1 Required libraries

The required libraries are loaded - RomicsProcessor written by Geremy Clair (2020) is used to perform trackable transformation and statistics to the dataset - proteinminion written by Geremy Clair (2020) is used to extract fasta information and to perform gene ontology and KEGG pathways enrichement analysis (2020)

library("RomicsProcessor")
library("proteinminion")
library("DT") #for the rendering of the enrichment tables 

2 Fasta and protein ontologies download using ‘Protein Mini-On’

Using the package ‘Protein Mini-on’ (Geremy Clair 2020, in prep.), The fasta file was downloaded from Unipro for the human and bovine proteome on the Jun 15th, 2020

if(!file.exists("./03_output_files/Uniprot_Homo_sapiens_proteome_UP000005640_2021_03_23.fasta")){
    download_UniProtFasta(proteomeID = "UP000005640",reviewed = F,export = TRUE, file="./03_output_files/Uniprot_Homo_sapiens_proteome_UP000005640_2021_03_23.fasta")
    UniProtFasta_info<-UniprotFastaParser(file = "./03_output_files/Uniprot_Homo_sapiens_proteome_UP000005640_2021_03_23.fasta")
    write.csv(UniProtFasta_info, "./03_output_files/UniProtFasta_info.csv")
}

For each entry, ‘Protein Mini-On’ was use to download Gene Ontology (GO) terms and KEGG ids associated with the proteins. This upload was performed the exact same day as the download of the fasta file was done to ensure that the IDs will be identical as the ones present in the fasta file used).

if(file.exists("./03_output_files/UniprotTable_Homo_sapiens_proteome_UP000005640_2020_07_29.csv")){
  UniProtTable<-read.csv("./03_output_files/UniprotTable_Homo_sapiens_proteome_UP000005640_2020_07_29.csv")
  }else{
  download_UniProtTable(proteomeID = "UP000005640",reviewed = F)
  write.csv(UniProtTable,("./03_output_files/UniprotTable_Homo_sapiens_proteome_UP000005640_2020_07_29.csv"),row.names=FALSE)
  }

‘Protein-Mini-on’ was then used to generate tables containing the list of GOs, KEGG and reactome pathways and their associated protein IDs

if(file.exists("./03_output_files/UniProtTable_GO.csv")){
  UniProtTable_GO<-read.csv(file="./03_output_files/UniProtTable_GO.csv")
  UniProtTable_KEGG<-read.csv(file="./03_output_files/UniProtTable_KEGG.csv")
  UniProtTable_REACTOME<-read.csv(file="./03_output_files/UniProtTable_REACTOME.csv")
}else{
proteinminion::generate_UniProtTables()
write.csv(UniProtTable_GO,file="./03_output_files/UniProtTable_GO.csv",row.names=FALSE)
write.csv(UniProtTable_KEGG,file="./03_output_files/UniProtTable_KEGG.csv",row.names=FALSE)
write.csv(UniProtTable_REACTOME,file="./03_output_files/UniProtTable_REACTOME.csv",row.names=FALSE)
}

3 MaxQuant import

The LFQ data contained in the protein table was loaded, the corresponding metadata was loaded

data<-extractMaxQuant("./01_source_files/proteinGroups.txt",quantification_type = "LFQ",cont.rm = T,site.rm = T,rev.rm = T)
## [1] "115  Proteins were removed (protein(s) only identified by site,contaminant(s),reverse hit(s))"
## [1] "LFQ quantification was used"
IDsdetails<-extractMaxQuantIDs("./01_source_files/proteinGroups.txt",cont.rm = T,site.rm = T,rev.rm = T)
## [1] "115  Proteins were removed (protein(s) only identified by site,contaminant(s),reverse hit(s))"
IDsdetails<-cbind(UniProt_Name=sub(".*\\|","",IDsdetails$protein.ids), IDsdetails)
IDsdetails$representative_protein<-gsub(";.*","",IDsdetails$UniProt_Name)
IDsdetails<-merge(IDsdetails,UniProtTable,by.x="representative_protein",by.y="Uniprot_Accession")
IDsdetails$Gene<-IDsdetails$Gene_Name
IDsdetails$Gene[IDsdetails$Gene==""]<-paste0("undefined_gene_name_",IDsdetails$representative_protein[IDsdetails$Gene==""])
IDsdetails$representative_gene<-gsub(" .*","",IDsdetails$Gene)
IDsdetails<-cbind(IDsdetails[,2:ncol(IDsdetails)],representative_protein=IDsdetails[,1])

metadata<- read.csv(file = "./01_source_files/metadata.csv")
colnames(metadata)<- sub("p","",colnames(data))
colnames(metadata)<-tolower(colnames(metadata))
colnames(metadata)<-sub("x", "",colnames(metadata))
write.csv(IDsdetails,"MaxQuantIDS.csv")

First let’s evaluate the normality of the surface_density and of the percent_tissue

surface_density<-as.numeric(metadata[7,-1])
hist(surface_density,breaks = 8)

t<-shapiro.test(surface_density)
print(paste0("The surface density normality Shapiro-Wilk p=",t$p.value))
## [1] "The surface density normality Shapiro-Wilk p=0.149765528866248"
percent_tissue<-as.numeric(metadata[8,-1])
hist(percent_tissue,breaks = 8)

t<-shapiro.test(percent_tissue)
print(paste0("The surface density normality Shapiro-Wilk p=",t$p.value))
## [1] "The surface density normality Shapiro-Wilk p=0.0164815885642291"

The surface density was non normal we log-transformed the value and tested for normality again

log_surface_density<-log(surface_density)
hist(log_surface_density,breaks = 8)

t<-shapiro.test(log_surface_density)
print(paste0("The surface density normality Shapiro-Wilk p=",t$p.value))
## [1] "The surface density normality Shapiro-Wilk p=0.00164315430349359"

the log_surface_density was normal and was therefore added to the data

metadata<-rbind(metadata,c("log_surface_density",log_surface_density))

4 Romics_object creation

The data and metadata were placed in an romics_object, the sample names were retrieved from the metadata, the condition will be use for the coloring of the Figure and statistics

romics_proteins<- romicsCreateObject(data, metadata,main_factor = "disease_status",IDs = IDsdetails)

5 Full data analysis

5.1 Data cleaning and normalization

The missingness was evaluated for each channel/sample

romics_proteins<- romicsZeroToMissing(romics_proteins)
romicsPlotMissing(romics_proteins)

The proteins to be conserved for quantification were selected to contain at least 70% of complete values (3/4 samples) for a given condition, the overall missingness was evaluated after filtering.

romics_proteins<-romicsFilterMissing(romics_proteins, percentage_completeness = 70)
## [1] "1983 rows were removed for the data"
## [1] "Based on the minimum completeness set at 70%"
## [1] "at least the following number of sample(s) containing data was required:"
## Ctrl  IPF 
##    7   21 
## [1] "1922/3905 features remained after filtering (49.22%)."
print(paste0(nrow(romics_proteins$data),"/", nrow(romics_proteins$original_data)," proteins remained after filtering", " (",round(nrow(romics_proteins$data)/nrow(romics_proteins$original_data)*100,2),"%)."))
## [1] "1922/3905 proteins remained after filtering (49.22%)."
romicsPlotMissing(romics_proteins)

As the same quantity of protein was labelled for each sample, the expectation is that the distribution of the protein abundance is centered, therefore a median centering was performed prior to plot again the distribution boxplots.

romics_proteins<-log2transform(romics_proteins)
romics_proteins<-medianCenterSample(romics_proteins)
distribBoxplot(romics_proteins)

5.2 Grouping evaluation

The grouping of the samples by is checked by hierarchical clustering

romicsHclust(romics_proteins)

romicsHclust(romicsChangeFactor(romics_proteins,"disease_lvl"))

5.3 Data imputation

For some of the subsequent statistics imputations are required, we performed an imputation by assuming that the “non-detected” proteins were either low abundance or missing using the method developped by Tyranova et al. (PMID: 27348712). The gray distribution is the data distribution, the yellow distribution is the one for the random values used for imputation.

imputeMissingEval(romics_proteins,nb_stdev = 2,width_stdev = 0.5, bin=1)

romics_proteins<-imputeMissing(romics_proteins,nb_stdev = 2,width_stdev = 0.5)

A subset containing only the IPF samples was created

romics_IPF<-romicsSubset(romics_proteins,subset_vector = "IPF",type = "keep",by = "level",factor = "disease_status")
romics_IPF<-romicsChangeFactor(romics_IPF,"disease_lvl")

The hclust and PCA grouping were checked again after imputation

PCA_proteins<-romicsPCA(romics_proteins)
indPCAplot(romics_proteins, plotType = "percentage")

indPCAplot(romics_proteins, plotType = "individual",Xcomp=1,Ycomp =2)

indPCAplot(romics_proteins,  plotType = "individual",Xcomp=1,Ycomp =3)

indPCA3D(romics_proteins)
indPCA3D(romicsChangeFactor(romics_proteins,"disease_lvl"))
indPCAplot(romics_IPF, plotType = "individual",Xcomp=1,Ycomp =2)

indPCAplot(romics_IPF, plotType = "individual",Xcomp=1,Ycomp =3)

indPCA3D(romics_IPF)

Let’s first evaluate the number of clusters that should be used for the samples

fviz_nbclust(t(romics_proteins$data), kmeans, method = "silhouette", k.max = 10) + theme_bw() + ggtitle("The Silhouette Plot")

fviz_nbclust(t(romics_proteins$data), kmeans, method = "wss")+
geom_vline(xintercept = 3, linetype = 2)

library(cluster)
gap_stat <- clusGap(t(romics_proteins$data), FUN = kmeans, nstart = 25,
 K.max = 10, B = 10)
 print(gap_stat, method = "firstmax")
## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = t(romics_proteins$data), FUNcluster = kmeans, K.max = 10,     B = 10, nstart = 25)
## B=10 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
##  --> Number of clusters (method 'firstmax'): 3
##           logW   E.logW       gap     SE.sim
##  [1,] 6.741987 6.975959 0.2339717 0.01618882
##  [2,] 6.570249 6.868163 0.2979146 0.01210903
##  [3,] 6.472499 6.815738 0.3432384 0.01254660
##  [4,] 6.427634 6.768607 0.3409728 0.01488539
##  [5,] 6.387180 6.724190 0.3370101 0.01531711
##  [6,] 6.347208 6.681135 0.3339269 0.01541887
##  [7,] 6.307536 6.638092 0.3305558 0.01609475
##  [8,] 6.268323 6.596322 0.3279990 0.01603414
##  [9,] 6.228380 6.554911 0.3265311 0.01719211
## [10,] 6.186224 6.511944 0.3257197 0.01709298
fviz_gap_stat(gap_stat)

A silhouette analysis indicates that two main clusters exist in the dataset

set.seed(seed = 1)
k<-kmeans(t(romics_proteins$data),2,iter.max = 50)

print("the clusters were the following")
## [1] "the clusters were the following"
print(k$cluster)
## X120_101 X120_107  X120_21  X127_10  X127_13  X127_26  X190_28  X190_56 
##        1        1        1        2        1        1        1        1 
##  X190_59  X204_26  X204_48  X204_75  X224_41  X224_48  X224_84 X253_117 
##        1        1        1        1        1        1        1        2 
##  X253_72  X253_86   X268_3  X268_70  X268_76  X319_42  X319_49   X319_6 
##        2        1        1        1        1        1        1        1 
##  X323_37  X323_42  X323_71  X325_36  X325_61  X325_70  X145_94  X221_35 
##        1        1        1        1        1        1        2        2 
##  X222_66  X248_95 X249_117  X274_87  X302_16  X304_71   X62_27  X94_124 
##        2        2        2        2        2        2        2        2
print("Those were used to color the PCA plot")
## [1] "Those were used to color the PCA plot"
PC_coord<-data.frame(PCA_proteins$ind$coord[,1:2])
PC_coord<-cbind(PC_coord,k=as.character(k$cluster))

ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=k)) + 
  geom_point(size = 4)+
  ggtitle("Principal Component Analysis (2 clusters)")+
  scale_color_manual(values=c("dodgerblue", "orangered"))+
  xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
  ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
    theme_bw()

Now we will evaluate if there are difference in the clusters log(surface density)

#get the sample names for the samples in each cluster
Clust1 <-names(k$cluster[k$cluster==1])
Clust2 <-names(k$cluster[k$cluster==2])

Clust1 <-gsub("X","",Clust1)
Clust2 <-gsub("X","",Clust2)

#obtain the surface density of the clusters 1 and 2
Surf1 <- as.numeric(metadata[8,colnames(metadata) %in% Clust1])
Surf2 <- as.numeric(metadata[8,colnames(metadata) %in% Clust2])

t<-wilcox.test(Surf1,Surf2)

print(paste0("The t.test pvalue between Clust 1 and 2 is ",t$p.value))
## [1] "The t.test pvalue between Clust 1 and 2 is 0.00368418157314144"
Surf1 <- data.frame(values=Surf1,clust="1")
Surf2 <- data.frame(values=Surf2,clust="2")
Surf <- rbind(Surf1,Surf2)

ggplot(Surf, aes(x=clust, y=values,color=clust,fill=clust))+ 
  geom_boxplot(width=0.4)+
  geom_jitter(shape=16,size=4)+
  scale_color_manual(values=c("dodgerblue", "orangered"))+
  scale_fill_manual(values=alpha(c("dodgerblue", "orangered"), .3))+
  theme_bw()

similarily the evaluation was made for the percent_tissue

#get the sample names for the samples in each cluster
Clust1 <-names(k$cluster[k$cluster==1])
Clust2 <-names(k$cluster[k$cluster==2])

Clust1 <-gsub("X","",Clust1)
Clust2 <-gsub("X","",Clust2)

#obtain the surface density of the clusters 1 and 2
Perc1 <- as.numeric(metadata[7,colnames(metadata) %in% Clust1])
Perc2 <- as.numeric(metadata[7,colnames(metadata) %in% Clust2])

t<-wilcox.test(Perc1,Perc2)

print(paste0("The wilcox.test pvalue between Clust 1 and 2 is ",t$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 2 is 7.18178337273514e-07"
Perc1 <- data.frame(values=Perc1,clust="1")
Perc2 <- data.frame(values=Perc2,clust="2")
Perc<- rbind(Perc1,Perc2)

ggplot(Perc, aes(x=clust, y=values,color=clust,fill=clust))+ 
  geom_boxplot(width=0.4)+
  geom_jitter(shape=16,size=4)+
  scale_color_manual(values=c("dodgerblue", "orangered"))+
  scale_fill_manual(values=alpha(c("dodgerblue", "orangered"), .3))+
  theme_bw()

the elbow method and gap statistics indicate 3 clusters

k<-kmeans(t(romics_proteins$data),3)

print("the clusters were the following")
## [1] "the clusters were the following"
print(k$cluster)
## X120_101 X120_107  X120_21  X127_10  X127_13  X127_26  X190_28  X190_56 
##        1        2        1        1        2        2        1        1 
##  X190_59  X204_26  X204_48  X204_75  X224_41  X224_48  X224_84 X253_117 
##        2        2        2        1        2        1        1        1 
##  X253_72  X253_86   X268_3  X268_70  X268_76  X319_42  X319_49   X319_6 
##        1        1        1        1        1        2        2        2 
##  X323_37  X323_42  X323_71  X325_36  X325_61  X325_70  X145_94  X221_35 
##        2        1        2        2        2        2        3        3 
##  X222_66  X248_95 X249_117  X274_87  X302_16  X304_71   X62_27  X94_124 
##        3        3        3        3        3        3        3        3
print("Those were used to color the PCA plot")
## [1] "Those were used to color the PCA plot"
PC_coord<-data.frame(PCA_proteins$ind$coord[,1:2])
PC_coord<-cbind(PC_coord,k=as.character(k$cluster))

ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=k)) + 
  geom_point(size = 4,colours=c("yellow","blue","orange"))+
  ggtitle("Principal Component Analysis (3 clusters)")+
  scale_color_manual(values=c("dodgerblue", "orangered", "gray50"))+
  xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
  ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
    theme_bw()

Now we will evaluate if there are difference in the clusters log surface density

#get the sample names for the samples in each cluster
Clust1 <-names(k$cluster[k$cluster==1])
Clust2 <-names(k$cluster[k$cluster==2])
Clust3 <-names(k$cluster[k$cluster==3])

Clust1 <-gsub("X","",Clust1)
Clust2 <-gsub("X","",Clust2)
Clust3 <-gsub("X","",Clust3)

#obtain the surface density of the clusters 1 and 2
Surf1 <- as.numeric(metadata[8,colnames(metadata) %in% Clust1])
Surf2 <- as.numeric(metadata[8,colnames(metadata) %in% Clust2])
Surf3 <- as.numeric(metadata[8,colnames(metadata) %in% Clust3])

t1_2<-wilcox.test(Surf1,Surf2)
print(paste0("The wilcox.test pvalue between Clust 1 and 2 is ",t1_2$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 2 is 0.967417297543179"
t2_3<-wilcox.test(Surf2,Surf3)
print(paste0("The wilcox.test pvalue between Clust 2 and 3 is ",t2_3$p.value))
## [1] "The wilcox.test pvalue between Clust 2 and 3 is 5.93497228306759e-05"
t1_3<-wilcox.test(Surf1,Surf3)
print(paste0("The wilcox.test pvalue between Clust 1 and 3 is ",t1_3$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 3 is 0.000531700094225333"
Surf1 <- data.frame(values=Surf1,clust="1")
Surf2 <- data.frame(values=Surf2,clust="2")
Surf3 <- data.frame(values=Surf3,clust="3")

Surf <- rbind(Surf1,Surf2,Surf3)

ggplot(Surf, aes(x=clust, y=values,color=clust,fill=clust))+ 
  geom_boxplot(width=0.4)+
  geom_jitter(shape=16,size=4)+
  scale_color_manual(values=c("dodgerblue", "orangered","gray50"))+
  scale_fill_manual(values=alpha(c("dodgerblue", "orangered","gray50"), .3))+
  theme_bw()

similarily the evaluation was made for the percent_tissue

#get the sample names for the samples in each cluster
Clust1 <-names(k$cluster[k$cluster==1])
Clust2 <-names(k$cluster[k$cluster==2])
Clust3 <-names(k$cluster[k$cluster==3])
Clust1 <-gsub("X","",Clust1)
Clust2 <-gsub("X","",Clust2)
Clust3 <-gsub("X","",Clust3)


#obtain the surface density of the clusters 1 and 2
Perc1 <- as.numeric(metadata[7,colnames(metadata) %in% Clust1])
Perc2 <- as.numeric(metadata[7,colnames(metadata) %in% Clust2])
Perc3 <- as.numeric(metadata[7,colnames(metadata) %in% Clust3])

t1_2<-wilcox.test(Perc1,Perc2)
print(paste0("The wilcox.test pvalue between Clust 1 and 2 is ",t1_2$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 2 is 0.148479991170565"
t2_3<-wilcox.test(Perc2,Perc3)
print(paste0("The wilcox.test pvalue between Clust 2 and 3 is ",t2_3$p.value))
## [1] "The wilcox.test pvalue between Clust 2 and 3 is 1.22370562537476e-06"
t1_3<-wilcox.test(Perc1,Perc3)
print(paste0("The wilcox.test pvalue between Clust 1 and 3 is ",t1_3$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 3 is 2.44741125074952e-06"
Perc1 <- data.frame(values=Perc1,clust="1")
Perc2 <- data.frame(values=Perc2,clust="2")
Perc3 <- data.frame(values=Perc3,clust="3")

Perc <- rbind(Perc1,Perc2,Perc3)

ggplot(Perc, aes(x=clust, y=values,color=clust,fill=clust))+ 
  geom_boxplot(width=0.4)+
  geom_jitter(shape=16,size=4)+
  scale_color_manual(values=c("dodgerblue", "orangered","gray50"))+
  scale_fill_manual(values=alpha(c("dodgerblue", "orangered","gray50"), .3))+
  theme_bw()

metadata<-data.frame(t(romics_proteins$metadata))

PC_coord<-data.frame(PCA_proteins$ind$coord[,1:2])
PC_coord<-cbind(PC_coord,log_surface_density=as.numeric(metadata$log_surface_density),percent_tissue=metadata$percent_tissue, disease_status=metadata$disease_status, disease_lvl=metadata$disease_lvl)
for (i in 1:4){PC_coord[,i]<-as.numeric(PC_coord[,i])}

library("viridis")

ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=log_surface_density)) + 
  geom_point(size = 4)+scale_color_gradient(low="blue", high="red")+
  geom_text(label=round(PC_coord$log_surface_density,3),position = position_nudge(x = 7))+
  ggtitle("Principal Component Analysis (Surface density)") +
  xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
  ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
  theme_bw()+ scale_colour_viridis()

ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=percent_tissue)) + 
  geom_point(size = 4)+scale_color_gradient(low="blue", high="red")+
  geom_text(label=round(PC_coord$percent_tissue,3),position = position_nudge(x = 7))+
  ggtitle("Principal Component Analysis (Percent_tissue)") +
  xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
  ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
  theme_bw()+ scale_colour_viridis()

ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=disease_status))+ 
  geom_point(size = 4)+
  ggtitle("Principal Component Analysis") +
  xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
  ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
  theme_bw()

ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=disease_lvl))+ 
  geom_point(size = 4)+
  ggtitle("Principal Component Analysis") +
  xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
  ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
  theme_bw()

For all cores we performed a trajectory analysis using the package SLICER

library("SLICER")
library("lle")

table_data<-t(romics_proteins$data)
proteins <- select_genes(table_data)
k = select_k(table_data[,proteins], kmin=2)
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
traj_lle = lle(table_data[,proteins], m=2, k)$Y
## finding neighbours
## calculating weights
## computing coordinates
print("the points of the trajectory are plotted")
## [1] "the points of the trajectory are plotted"
traj_graph = conn_knn_graph(traj_lle,5)
ends = find_extreme_cells(traj_graph, traj_lle)

start = 1

print("The trajectory connections were plotted")
## [1] "The trajectory connections were plotted"
distances = graph_process_distance(traj_graph,embedding = traj_lle,start = 1)

traj_lle_for_graph = as.data.frame(traj_lle)
colnames(traj_lle_for_graph) = c("Manifold_dim_1","Manifold_dim_2")
traj_lle_for_graph$Manifold_dim_1 = as.numeric(traj_lle_for_graph$Manifold_dim_1)
traj_lle_for_graph$Manifold_dim_2 = as.numeric(traj_lle_for_graph$Manifold_dim_2)
traj_lle_for_graph<- cbind(ROI= rownames(table_data), disease_lvl= t(romics_proteins$metadata[3,]), traj_lle_for_graph, branches =  assign_branches(traj_graph,start = 1))

print("The plots is colored by the log(surface density")
## [1] "The plots is colored by the log(surface density"
ggplot(traj_lle_for_graph,aes(x=Manifold_dim_1,Manifold_dim_2,color=disease_lvl))+ geom_point(size=1,)+ geom_text(label=traj_lle_for_graph$disease_lvl)+theme_ROP()

DT::datatable(traj_lle_for_graph)

5.4 Statistics

romics_proteins<-romicsMean(romics_proteins)
## [1] "The Statistics layer was added to your object"
## [1] "Means columns (*_mean) were added to the statistics"
romics_proteins<-romicsSd(romics_proteins)
## [1] "The standard deviation columns (*_sd) were added to the statistics"

First we will evaluate the proteins differentially expressed overall between the IPF and control donors

romics_proteins<-romicsANOVA(romics_proteins)
## [1] "The ANOVA columns (ANOVA_p and ANOVA_padj) were added to the statistics"
print(paste0(sum(romics_proteins$statistics$ANOVA_p<0.05), " proteins had an ANOVA p<0.05."))
## [1] "1304 proteins had an ANOVA p<0.05."
pval<-data.frame(ids=rownames(romics_proteins$statistics), p=romics_proteins$statistics$ANOVA_p)
ggplot(pval, aes(p)) + geom_histogram(binwidth = 0.01)+theme_ROP()+ggtitle("ANOVA p frequency plot")+geom_vline(xintercept=0.05,linetype="dashed", color = "red")

print(paste0(sum(romics_proteins$statistics$ANOVA_padj<0.05), " proteins had an ANOVA padjusted<0.05."))
## [1] "1227 proteins had an ANOVA padjusted<0.05."
pval<-data.frame(ids=rownames(romics_proteins$statistics), p=romics_proteins$statistics$ANOVA_padj)
ggplot(pval, aes(p)) + geom_histogram(binwidth = 0.01)+theme_ROP()+ggtitle("ANOVA padj frequency plot")+geom_vline(xintercept=0.05,linetype="dashed", color = "red")

A heatmap depicting the proteins passing an ANOVA p<0.05 is plotted, the clusters obtained were saved in the statistics.

romicsHeatmap(romics_proteins,variable_hclust_number = 2,ANOVA_filter = "p", p=0.05)

romics_proteins<-romicsVariableHclust(romics_proteins,clusters = 2,ANOVA_filter = "p",p= 0.05,plot = F)
## [1] "The columns hclust_clusters was added to the statistics"
romics_proteins<-romicsZscores(romics_proteins)
## [1] "Z_score_ columns were added to the statistics"

Ttests were also calculated

romics_proteins<-romicsTtest(romics_proteins,var.equal = F)
## [1] "T_test columns were added to the statistics"
romicsVolcano(romics_proteins,p_type = "p",min_fold_change = 0)
## [1] "'stat_type' was missing 't.test' were used by default"

Ttests were also calculated by level of IPF

romics_proteins<-romicsChangeFactor(romics_proteins,main_factor = "disease_lvl")
romics_proteins<-romicsTtest(romics_proteins,var.equal = F)
## [1] "T_test columns were added to the statistics"
romicsVolcano(romicsChangeIDs(romics_proteins,newIDs = "representative_gene"),p_type = "p",
              min_fold_change = 0,
              plot_type = "plotly",
              )
## [1] "'stat_type' was missing 't.test' were used by default"
romics_proteins<-romicsChangeFactor(romics_proteins,main_factor = "disease_status")

As there was a strong relation The list of consensus HDL proteins can be found there:https://homepages.uc.edu/~davidswm/HDLproteome.html We uploaded the HDL database and evaluated if our clusters were enriched in those proteins these are added to the HDL associated GO term.

HDL<-read.csv(file = "01_source_files/HDLProteomeList2020.csv")
GO_HDL<-data.frame( Uniprot_Accession=HDL$Acc...,Uniprot_ID=HDL$Abbrev.,GO_accession="34364",GO_description="high-density lipoprotein particle", GO_type="GO_CC")
UniProtTable_GO<-rbind(UniProtTable_GO,GO_HDL)

An enrichment analysis was performed for these two clusters generated

Clust1<-rownames(romics_proteins$statistics)[romics_proteins$statistics$hclust_clusters==1&!is.na(romics_proteins$statistics$hclust_clusters)]
Clust2<-rownames(romics_proteins$statistics)[romics_proteins$statistics$hclust_clusters==2&!is.na(romics_proteins$statistics$hclust_clusters)]

Universe<-rownames(romics_proteins$statistics)

Clust1<-sub(".*\\;","",Clust1)
Clust2<-sub(".*\\;","",Clust2)

Universe<-sub(".*\\;","",Universe)

Clust1_Enr<-cbind(enriched_in="Clust1",Enrich_EASE(Clust1,Universe))
## [1] "2023-10-16 14:33:14 : GO enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 1150 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
## [1] "2023-10-16 14:33:16 : KEGG enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 1150 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
## [1] "2023-10-16 14:33:16 : REACTOME enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 1150 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
Clust2_Enr<-cbind(enriched_in="Clust2",Enrich_EASE(Clust2,Universe))
## [1] "2023-10-16 14:33:17 : GO enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 91 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
## [1] "2023-10-16 14:33:17 : KEGG enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 91 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
## [1] "2023-10-16 14:33:17 : REACTOME enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 91 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
Enrichments <-enrfilter(rbind(Clust1_Enr,Clust2_Enr),pval_type = "pval",foldchange = 1,min_feature = 2 )

write.table(Enrichments,file = "03_output_files/Enrichments_ANOVA_clusters.txt",row.names = F, sep="\t")
DT::datatable(Enrichments)

Plots were generated for specific proteins.

Figs<-romicsChangeIDs(romics_proteins,newIDs = "representative_gene") 

library("ggpubr")
#APOA1
singleVariablePlot(Figs,variable = "APOA1")+ geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 9, label =formatC(Figs$statistics[grepl("APOA1",rownames(Figs$statistics)),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#APOA4
singleVariablePlot(Figs,variable = "APOA4")+ geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics[grepl("APOA4",rownames(Figs$statistics)),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#APOL1
singleVariablePlot(Figs,variable ="APOL1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 0, label =formatC(Figs$statistics[grepl("APOL1",rownames(Figs$statistics)),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Complement C3
singleVariablePlot(Figs,variable ="C3")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 8, label =formatC(Figs$statistics["C3"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Serum Amyloid A4

singleVariablePlot(Figs,variable ="SAA2-SAA4")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["SAA2-SAA4"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Fibrinogen alpha chain

singleVariablePlot(Figs,variable ="FGA",title = "fibrinogen alpha chain")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 8, label =formatC(Figs$statistics["FGA"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Haptoglobin
singleVariablePlot(Figs,variable ="HP",title = "haptoglobin")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 9, label =formatC(Figs$statistics["HP"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#kininogen-1
singleVariablePlot(Figs,variable ="KNG1",title = "haptoglobin")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["KNG1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Clusterin
singleVariablePlot(Figs,variable ="CLU",title = "Clusterin")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["CLU"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Caveolin-1
singleVariablePlot(Figs,variable ="CAV1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 5, label =formatC(Figs$statistics["CAV1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Caveolin-2
singleVariablePlot(Figs,variable ="CAV2")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["CAV2"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#PICALM
singleVariablePlot(Figs,variable ="PICALM")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["PICALM"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#PACSIN2
singleVariablePlot(Figs,variable ="PACSIN2")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["PACSIN2"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#alpha-1-anti-trypsin
singleVariablePlot(Figs,variable ="SERPINA1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["SERPINA1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#HDL receptor related protein 1
singleVariablePlot(Figs,variable ="LRP1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["LRP1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#PRX
singleVariablePlot(Figs,variable ="PRX")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["PRX"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#CA4
singleVariablePlot(Figs,variable ="CA4")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3.5, label =formatC(Figs$statistics["CA4"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#AGER
singleVariablePlot(Figs,variable ="AGER")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["AGER"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#SCEL
singleVariablePlot(Figs,variable ="SCEL")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3, label =formatC(Figs$statistics["SCEL"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#ABCA3
singleVariablePlot(Figs,variable ="ABCA3")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2.5, label =formatC(Figs$statistics["SCEL"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#SFTPC
singleVariablePlot(Figs,variable ="SFTPC")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["SFTPC"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#HBB
singleVariablePlot(Figs,variable ="HBB")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 13, label =formatC(Figs$statistics["HBB"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#HBB
p11<-singleVariablePlot(Figs,variable ="HBB")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 13, label =formatC(Figs$statistics["HBB"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#COL14A1
p12<-singleVariablePlot(Figs,variable ="COL14A1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6.5, label =formatC(Figs$statistics["COL14A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))



#SFTPC
singleVariablePlot(Figs,variable ="SFTPC")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["SFTPC"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#MFAP5
singleVariablePlot(Figs,variable ="MFAP5")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3, label =formatC(Figs$statistics["MFAP5"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#uteroglobin
singleVariablePlot(Figs,variable ="SCGB1A1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["SCGB1A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#THY1
singleVariablePlot(Figs,variable ="THY1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["THY1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#SYNPO2
p9<-singleVariablePlot(Figs,variable ="SYNPO2")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["SYNPO2"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#DES
p10<-singleVariablePlot(Figs,variable ="DES")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["DES"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#ACTA2/a-SMA
singleVariablePlot(Figs,variable ="ACTA2")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["ACTA2"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#to be finished

#COL1A1
singleVariablePlot(Figs,variable ="COL1A1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 7, label =formatC(Figs$statistics["COL1A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#COL3A1
singleVariablePlot(Figs,variable ="COL3A1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["COL3A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#COL15A1
singleVariablePlot(Figs,variable ="COL15A1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["COL15A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#TGFB1
singleVariablePlot(Figs,variable ="TGFBI")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["TGFBI"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#TGFB1
singleVariablePlot(Figs,variable ="TGFBI")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["TGFBI"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#TGFB1I1
singleVariablePlot(Figs,variable ="TGFB1I1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3.5, label =formatC(Figs$statistics["TGFB1I1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#CD14
singleVariablePlot(Figs,variable ="CD14")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3.5, label =formatC(Figs$statistics["CD14"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Antileukoproteinase
singleVariablePlot(Figs,variable ="SLPI")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["SLPI"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Neutrophil defensin 3
singleVariablePlot(Figs,variable ="DEFA3")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 8, label =formatC(Figs$statistics["DEFA3"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#Chemoatractants

#Macrophage migration inhibitory factor 
singleVariablePlot(Figs,variable ="MIF")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["MIF"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

#High mobility group protein B1
singleVariablePlot(Figs,variable ="HMGB1")+
  geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["HMGB1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))

Enrichments were performed for the comparison IPF1/Ctrl

IPF1_up<-rownames(romics_proteins$statistics)[romics_proteins$statistics$IPF1_vs_Ctrl_Ttest_p<0.05&romics_proteins$statistics$`log(IPF1/Ctrl)`>0]
IPF1_down<-rownames(romics_proteins$statistics)[romics_proteins$statistics$IPF1_vs_Ctrl_Ttest_p<0.05&romics_proteins$statistics$`log(IPF1/Ctrl)`<0]
universe<-rownames(romics_proteins$statistics)

IPF1_up_GO<-cbind(Enriched_in="up_in_IPF1", UniProt_GO_EASE(IPF1_up,universe))
## [1] "64 proteins of your <query> list had more than one identifier, only the first one was conserved"
## [1] "1569 proteins of your <universe> list had more than one identifier, only the first one was conserved"
## [1] "Your <query> contained 0 UniProt_IDs and 74 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
IPF1_down_GO<-cbind(Enriched_in="down_in_IPF1",UniProt_GO_EASE(IPF1_down,universe))
## [1] "1053 proteins of your <query> list had more than one identifier, only the first one was conserved"
## [1] "1569 proteins of your <universe> list had more than one identifier, only the first one was conserved"
## [1] "Your <query> contained 0 UniProt_IDs and 1268 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
IPF1_up_KEGG<-cbind(Enriched_in="up_in_IPF1",UniProt_KEGG_EASE(IPF1_up,universe))
## [1] "64 proteins of your <query> list had more than one identifier, only the first one was conserved"
## [1] "1569 proteins of your <universe> list had more than one identifier, only the first one was conserved"
## [1] "Your <query> contained 0 UniProt_IDs and 74 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
IPF1_down_KEGG<-cbind(Enriched_in="down_in_IPF1",UniProt_KEGG_EASE(IPF1_down,universe))
## [1] "1053 proteins of your <query> list had more than one identifier, only the first one was conserved"
## [1] "1569 proteins of your <universe> list had more than one identifier, only the first one was conserved"
## [1] "Your <query> contained 0 UniProt_IDs and 1268 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
Enrichments_IPF1_ctrl<- rbind(IPF1_up_GO,IPF1_down_GO,IPF1_up_KEGG,IPF1_down_KEGG)

Enrichments_IPF1_ctrl <- Enrichments_IPF1_ctrl[Enrichments_IPF1_ctrl$pval<0.05&Enrichments_IPF1_ctrl$fold_change>1,]
DT::datatable(Enrichments_IPF1_ctrl)
write.table(Enrichments_IPF1_ctrl,"./03_output_files/Enrichment_early_IPF.txt",row.names = F,sep="\t")

Now we will look for proteins with a trend relationship between the surface density and the protein abundances, the controls were removed to avoid biasing the linearity

romics_IPF<-romicsTrend(romics_IPF,factor = "surface_density",log_factor = FALSE,type = "linear",p = 0.05)
## [1] "The Statistics layer was added to your object"
## [1] "The trend analysis columns were added to the statistics"
romicsTrendHeatmap(romics_IPF,log_factor = FALSE,labRow = FALSE,margins=c(10,5))

Now for these two trends we will perform an enrichment analysis to identify function associated with these trends

linear_increasing<-gsub("\\;.*","",rownames(romics_IPF$statistics)[romics_IPF$statistics$best_fitted_trend=="linear_increasing"])
linear_decreasing<-gsub("\\;.*","",rownames(romics_IPF$statistics)[romics_IPF$statistics$best_fitted_trend=="linear_decreasing"])
universe<-gsub("\\;.*","",rownames(romics_IPF$statistics))

linear_increasing_KEGG<-UniProt_KEGG_EASE(linear_increasing,universe)
## [1] "Your <query> contained 0 UniProt_IDs and 74 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
linear_decreasing_KEGG<-UniProt_KEGG_EASE(linear_decreasing,universe)
## [1] "Your <query> contained 0 UniProt_IDs and 270 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
linear_increasing_GO<-UniProt_GO_EASE(linear_increasing,universe)
## [1] "Your <query> contained 0 UniProt_IDs and 74 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
linear_decreasing_GO<-UniProt_GO_EASE(linear_decreasing,universe)
## [1] "Your <query> contained 0 UniProt_IDs and 270 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
linear_increasing_KEGG<-cbind(Cluster=rep("linear_increasing_KEGG",nrow(linear_increasing_KEGG)),linear_increasing_KEGG)
linear_decreasing_KEGG<-cbind(Cluster=rep("linear_decreasing_KEGG",nrow(linear_decreasing_KEGG)),linear_decreasing_KEGG)

linear_increasing_GO<-cbind(Cluster=rep("linear_increasing_GO",nrow(linear_increasing_GO)),linear_increasing_GO)
linear_decreasing_GO<-cbind(Cluster=rep("linear_decreasing_GO",nrow(linear_decreasing_GO)),linear_decreasing_GO)

Enrichments_trends <- rbind(linear_increasing_GO,linear_decreasing_GO,linear_increasing_KEGG,linear_decreasing_KEGG)
Enrichments_trends <- Enrichments_trends[Enrichments_trends$pval<0.1&Enrichments_trends$fold_change>1,]
DT::datatable(Enrichments_trends)
write.table(Enrichments_trends,"./03_output_files/Enrichment_trends.txt",row.names = F,sep="\t")

Examples of proteins mapped to trends.

singleVariableTrend(romics_IPF,"Q15109",title = "Rage/Ager(AT1)",log_factor = F)

singleVariableTrend(romics_IPF,"H3BTN5",title = "Pyruvate Kinase",log_factor = F)

singleVariableTrend(romics_IPF,"Q13751",title = "Laminin subunit beta-3",log_factor = F)

singleVariableTrend(romics_IPF,"O14558",title = "Heat shock protein beta-6",log_factor = F)

singleVariableTrend(romics_IPF,"Q05682",title = "Caldesmon",log_factor = F)

The final R object was finally exported

save(romics_IPF,file = "03_output_files/Romics_object.Rda")